In [ ]:
import pandas as pd
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import pyodbc
import pymssql
import sqlalchemy
import holoviews as hv
from shapely import wkt
import psrcelmerpy
import ogr
import json
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure
from bokeh.tile_providers import get_provider, Vendors
from bokeh.plotting import figure, output_file, show, output_notebook
import plotly.graph_objects as go
import plotly.express as pex
from time import gmtime, strftime

Inrix Origin-Destinations (OD) freight data analysis¶

About the data¶

Inrix provided us data for May 2015 and May 2019 freight movements. The data includes the following:

  • Trips Origin-Destination Information - includes information about trips such as trips start/end location, start/end time, origin/destination zone, trip length, etc.

  • Trajectory Information - contains trajectories for all of the trips listed in the trip table.

In [ ]:
#load 2019 data
data_path = r'\\AWS-Model10\Model Data 2\INRIX'
os.chdir(data_path)
In [ ]:
trips_header = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201905\date=2020-12-29\reportId=45749\v1', 'schema', 'TripBulkReportTripsHeaders.csv'))
trips_df = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201905\date=2020-12-29\reportId=45749\v1\data\trips\trips.csv.gz'), 
                       compression='gzip', names = trips_header.columns)

2019 data

There are about 74,500 unique device IDs - meaning that INRIX data captures 74,500 trucks making 1.2 millions of trips in the PSRC region.
On average, each truck completed about 16 trips in May. Although median is 3 trips. Such a difference between average and median is due to outliers in the data - some trucks did more than 600 trips during May 2019, while 75% of all of the trucks have up to 7 trips.

In [ ]:
devices =trips_df.groupby('DeviceId')['DeviceId'].count().reset_index(name='count').sort_values(by=['count'])
#devices.describe()
In [ ]:
providers =trips_df.groupby('ProviderId')['ProviderId'].count().reset_index(name='count').sort_values(by=['count'])
#providers.describe()
In [ ]:
#devices.tail(30)
In [ ]:
device_test = trips_df[trips_df["DeviceId"] == '15bfd6b38a6b00cd53b5f0de44368dc9']
In [ ]:
#device_test
In [ ]:
trips_header_2015 = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201505\date=2020-12-29\reportId=45748\v1', 'schema', 'TripBulkReportTripsHeaders.csv'))
trips_df_2015 = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201505\date=2020-12-29\reportId=45748\v1\data\trips\trips.csv.gz'), 
                       compression='gzip', names = trips_header_2015.columns)

2015 data
There are about 87,000 unique device IDs.
Similar to the May 2019 data, the average number of trips per truck is greater than median which is indicative of outliers in the data. This means that there are some trucks that make many more trips comparing to the majority of trucks. On average, in May 2015, each truck completed about 13 trips in May. Although median is 4 trips. 75% of all of the trucks completed up to 8 trips.
The maximum number of trips recorded for one device ID is 549 trips.

In [ ]:
devices_2015 =trips_df_2015.groupby('DeviceId')['DeviceId'].count().reset_index(name='count').sort_values(by=['count'])
#devices_2015.describe()
In [ ]:
providers_2015 =trips_df_2015.groupby('ProviderId')['ProviderId'].count().reset_index(name='count').sort_values(by=['count'])
#providers_2015.describe()

In total, there were 1,242,924 trips recorded in 2019 data and 1,179,583 trips in 2015 data

In [ ]:
#number of trips recroded in 2019
#len(pd.unique(trips_df['TripId'])) 
In [ ]:
#number of trips recroded in 2015
#len(pd.unique(trips_df_2015['TripId'])) 

Vehicle types in the dataset

Inrix data includes medium trucks/vans (ranges from 14001–26000 lb) and heavy trucks (> 26000 lb) in both 2015 and 2019.

In 2019, there were 1,013,601 made by medium trucks and 229,334 trips by heavy trucks.

In 2015, there were 841,373 made by medium trucks and 338,223 trips by heavy trucks.

In [ ]:
#trips by vehicle class in 2019
#trips_df.groupby('VehicleWeightClass').size()
In [ ]:
#trips_df.groupby('VehicleWeightClass').DeviceId.nunique()
In [ ]:
#trips by vehicle class in 2015
#trips_df_2015.groupby('VehicleWeightClass').size()
In [ ]:
#trips_df_2015.groupby('VehicleWeightClass').DeviceId.nunique()

Dates covered

Data covers the following dates in 2019:
May 1st - 31st 2019
Number of trips during an average weekday ~47-55k
Number of trips during an average weekend ~9-18k

in 2015:
May 1st - 31st 2015
Number of trips during an average weekday ~41-69k
Number of trips during an average weekend ~11-24k

In [ ]:
trips_df['StartDate_upd'] = (pd.to_datetime(trips_df['StartDate'])
                     .dt.tz_convert('America/Los_Angeles'))  
In [ ]:
#trips_df['StartDate_upd'] = pd.to_datetime(trips_df["StartDate"])
trips_df["StartDate_dmy"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df["StartTime_h"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%H'))

trips_df['EndDate_upd'] = pd.to_datetime(trips_df["EndDate"])
trips_df["EndDate_dmy"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df["EndTime_h"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%H'))

trips_df_2015['StartDate_upd'] = pd.to_datetime(trips_df_2015["StartDate"])
trips_df_2015["StartDate_dmy"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df_2015["StartTime_h"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%H'))
In [ ]:
trips_df["StartDOW"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%A'))
trips_df["EndDOW"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%A'))
trips_df_2015["StartDOW"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%A'))
In [ ]:
#weekday trips only
weekdays = ["Monday", "Tuesday","Wednesday","Thursday","Friday"]
trips_df_weekdays = trips_df[trips_df.StartDOW.isin(weekdays)]
end_trips_df_weekdays = trips_df[trips_df.EndDOW.isin(weekdays)]
trips_df_2015_weekdays = trips_df_2015[trips_df_2015.StartDOW.isin(weekdays)]
In [ ]:
#separating heavy and medium trucks
trips_df_heavy = trips_df_weekdays[trips_df_weekdays['VehicleWeightClass']==3]
trips_df_medium = trips_df_weekdays[trips_df_weekdays['VehicleWeightClass']==2]
In [ ]:
trips_df_heavy_2015 = trips_df_2015[trips_df_2015['VehicleWeightClass']==3]
trips_df_medium_2015 = trips_df_2015[trips_df_2015['VehicleWeightClass']==2]
In [ ]:
end_trips_df_heavy = end_trips_df_weekdays[end_trips_df_weekdays['VehicleWeightClass']==3]
end_trips_df_medium = end_trips_df_weekdays[end_trips_df_weekdays['VehicleWeightClass']==2]

Trips by type of truck and time of day during a weekday

In [ ]:
m_trips_hour = trips_df_medium.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
m_trips_hour_agg = m_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()

end_m_trips_hour = end_trips_df_medium.groupby(['EndDOW','EndTime_h',"StartDate_dmy"]).size().reset_index(name='count')
end_m_trips_hour_agg = end_m_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2398152822.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  m_trips_hour_agg = m_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2398152822.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  end_m_trips_hour_agg = end_m_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()

Medium Trucks Volume by day of week and hour of the day

In May 2019, medium truck volumes are higher Tuesday, Wednesday, and Thursday. Medium truck volumes are lower during Monday and Friday. The reason that an average Mondays' volumes are lower during May 2019 is due to Memorial Day.

On average, all weekdays have two pronounced AM and noon peaks: one at 7 AM and another one at 11 AM-12 PM.

In [ ]:
output_notebook()
p = figure(title="Medium Trucks Volume by Hour", x_axis_label='Time', y_axis_label='Number of Trips')

p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Monday'],  
       legend_label="Monday", line_width=2)
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Tuesday'],  
       legend_label="Tuesday", line_width=2,line_color='#d95f02')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Wednesday'],  
       legend_label="Wednesday", line_width=2,line_color='#7570b3')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Thursday'],  
       legend_label="Thursday", line_width=2,line_color='#e7298a')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Friday'],  
       legend_label="Friday", line_width=2,line_color='#1b9e77')

hover = HoverTool()
hover.tooltips=[
    ('Average Trips', '@count'),
    ('Time', '@StartTime_h'),
    ('Day of Week', '@StartDOW')
]

p.add_tools(hover)
p.legend.location = "top_left"
show(p)
Loading BokehJS ...
In [ ]:
h_trips_hour = trips_df_heavy.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()

end_h_trips_hour = end_trips_df_heavy.groupby(['EndDOW','EndTime_h',"EndDate_dmy"]).size().reset_index(name='count')
end_h_trips_hour_agg = end_h_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\4144757500.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\4144757500.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  end_h_trips_hour_agg = end_h_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()

Heavy Trucks Volume by day of week and hour of the day

In May 2019, heavy truck volumes are the highest during Tuesday, Wednesday, and Thursday. Medium truck volumes are slightly lower during Monday and Friday. Sumular to medium trucks volume, Mondays' volumes might be lower due to the Memorial Day.

Similar to the medium trucks volume, heavy trucks volume has two pronounced peaks: one at 10 AM (which is more spread out) and another one at 4 PM.

In [ ]:
output_notebook()
p = figure(title="Heavy Trucks Volume by Hour", x_axis_label='Time', y_axis_label='Number of Trips')

p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Monday'],  
       legend_label="Monday", line_width=2)
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Tuesday'],  
       legend_label="Tuesday", line_width=2,line_color='#d95f02')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Wednesday'],  
       legend_label="Wednesday", line_width=2,line_color='#7570b3')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Thursday'],  
       legend_label="Thursday", line_width=2,line_color='#e7298a')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Friday'],  
       legend_label="Friday", line_width=2,line_color='#1b9e77')

hover = HoverTool()
hover.tooltips=[
    ('Average Trips', '@count'),
    ('Time', '@StartTime_h'),
    ('Day of Week', '@StartDOW')
]

p.add_tools(hover)
p.legend.location = "top_left"
show(p)
Loading BokehJS ...
In [ ]:
h_trips_hour = trips_df_heavy.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2563645738.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
In [ ]:
h_trips_hour_2015 = trips_df_heavy_2015.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg_2015 = h_trips_hour_2015.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2067774375.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  h_trips_hour_agg_2015 = h_trips_hour_2015.groupby(['StartDOW','StartTime_h']).mean().reset_index()
In [ ]:
df_test = trips_df_weekdays.groupby(["StartDate_dmy","StartDOW"]).size()
df_test_2015 = trips_df_2015_weekdays.groupby(["StartDate_dmy","StartDOW"]).size()
In [ ]:
df_test = pd.DataFrame(df_test)
df_test_2015 = pd.DataFrame(df_test_2015)

Spatial Analysis¶

In this section, we have grouped the truck trips by different geographies, including:

Census Tracts
Urban Centers
Traffic Analysis Zones

In [ ]:
import psrcelmerpy
conn = psrcelmerpy.ElmerGeoConn()
tracts = conn.read_geolayer(layer_name='tract2010')
In [ ]:
urban_centers = conn.read_geolayer(layer_name='urban_centers')
In [ ]:
taz = conn.read_geolayer(layer_name='taz2010')
In [ ]:
mic = conn.read_geolayer(layer_name='micen')
In [ ]:
safety_rest_area=conn.read_geolayer(layer_name='safety_rest_area')
In [ ]:
private_truck_stop = conn.read_geolayer(layer_name='private_truck_stop')
In [ ]:
trips_d = gpd.GeoDataFrame(
    trips_df, geometry=gpd.points_from_xy(trips_df.EndLocLon, trips_df.EndLocLat))
trips_d.crs = "EPSG:4326"

trips_d_2015 = gpd.GeoDataFrame(
    trips_df_2015, geometry=gpd.points_from_xy(trips_df_2015.EndLocLon, trips_df_2015.EndLocLat))
trips_d_2015.crs = "EPSG:4326"
total_d=len(trips_d)
total_d_2015=len(trips_d_2015)
In [ ]:
#creating geo data frame for trips destinations during weekdays only
trips_d_weekdays = gpd.GeoDataFrame(
    trips_df_weekdays, geometry=gpd.points_from_xy(trips_df_weekdays.EndLocLon, trips_df_weekdays.EndLocLat))
trips_d_weekdays.crs = "EPSG:4326"

trips_d_2015_weekdays = gpd.GeoDataFrame(
    trips_df_2015_weekdays, geometry=gpd.points_from_xy(trips_df_2015_weekdays.EndLocLon, trips_df_2015_weekdays.EndLocLat))
trips_d_2015_weekdays.crs = "EPSG:4326"
In [ ]:
trips_o = gpd.GeoDataFrame(
    trips_df, geometry=gpd.points_from_xy(trips_df.StartLocLon, trips_df.StartLocLat))
trips_o.crs = "EPSG:4326"

trips_o_2015 = gpd.GeoDataFrame(
    trips_df_2015, geometry=gpd.points_from_xy(trips_df_2015.StartLocLon, trips_df_2015.StartLocLat))
trips_o_2015.crs = "EPSG:4326"
In [ ]:
tracts = tracts.to_crs(epsg=4326)
urban_centers = urban_centers.to_crs(epsg=4326)
taz = taz.to_crs(epsg=4326)
mic = mic.to_crs(epsg=4326)
safety_rest_area = safety_rest_area.to_crs(epsg=4326)
private_truck_stop = private_truck_stop.to_crs(epsg=4326)
In [ ]:
hex_map= gpd.read_file('Y:/Centers Monitoring/Systems Monitoring/2024/Data/1_3_Population/New File Geodatabase.gdb', layer='hex_grid_4mile_region')
In [ ]:
hex_map=hex_map.to_crs(epsg=4326)
In [ ]:
hex_d = gpd.sjoin(hex_map, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
taz = taz.to_crs(epsg=4326)
In [ ]:
tract_trips_o = gpd.sjoin(tracts, trips_o, how='inner', op='intersects')
tract_trips_d = gpd.sjoin(tracts, trips_d, how='inner', op='intersects')

tract_trips_o_2015 = gpd.sjoin(tracts, trips_o_2015, how='inner', op='intersects')
tract_trips_d_2015 = gpd.sjoin(tracts, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
uc_trips_o = gpd.sjoin(urban_centers, trips_o, how='inner', op='intersects')
uc_trips_d = gpd.sjoin(urban_centers, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
uc_trips_o_2015 = gpd.sjoin(urban_centers, trips_o_2015, how='inner', op='intersects')
uc_trips_d_2015 = gpd.sjoin(urban_centers, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
mic_trips_o = gpd.sjoin(mic, trips_o, how='inner', op='intersects')
mic_trips_d = gpd.sjoin(mic, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
mic_trips_o_2015 = gpd.sjoin(mic, trips_o_2015, how='inner', op='intersects')
mic_trips_d_2015 = gpd.sjoin(mic, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
taz_trips_o = gpd.sjoin(taz, trips_o, how='inner', op='intersects')
taz_trips_d = gpd.sjoin(taz, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
test3 = tracts.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
taz_trips_d_weekdays = gpd.sjoin(taz, trips_d_weekdays, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
tract_trips_o=tract_trips_o.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
tract_trips_d=tract_trips_d.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
hex_d.columns
Out[ ]:
Index(['Id', 'GRID_ID', 'Shape_Length', 'Shape_Area', 'geometry',
       'index_right', 'TripId', 'DeviceId', 'ProviderId', 'Mode', 'StartDate',
       'StartWDay', 'EndDate', 'EndWDay', 'StartLocLat', 'StartLocLon',
       'EndLocLat', 'EndLocLon', 'GeospatialType', 'ProviderType',
       'ProviderDrivingProfile', 'VehicleWeightClass', 'ProbeSourceType',
       'OriginZoneName', 'DestinationZoneName', 'EndpointType',
       'TripMeanSpeedKph', 'TripMaxSpeedKph', 'TripDistanceMeters',
       'MovementType', 'OriginCbg', 'DestCbg', 'StartTimezone', 'EndTimezone',
       'WaypointFreqSec', 'StartQk', 'EndQk', 'StartDate_upd', 'StartDate_dmy',
       'StartTime_h', 'EndDate_upd', 'EndDate_dmy', 'EndTime_h', 'StartDOW',
       'EndDOW'],
      dtype='object')
In [ ]:
hex_trips_d=hex_d.groupby('GRID_ID')['GRID_ID'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
tract_trips_o_2015=tract_trips_o_2015.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
tract_trips_d_2015=tract_trips_d_2015.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
uc_trips_o=uc_trips_o.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
uc_trips_d=uc_trips_d.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
mic.columns
mic_trips_d.columns
uc_trips_d.columns
urban_centers.columns
Out[ ]:
Index(['OBJECTID', 'id', 'name', 'area_feet', 'perimeter_', 'acres',
       'hectares', 'area', 'perimeter', 'adoptdate', 'SDE_STATE_ID',
       'category', 'Shape', 'geometry'],
      dtype='object')
In [ ]:
uc_trips_o_2015=uc_trips_o_2015.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
uc_trips_d_2015=uc_trips_d_2015.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
mic_trips_o=mic_trips_o.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_d=mic_trips_d.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
mic_trips_o_2015=mic_trips_o_2015.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_d_2015=mic_trips_d_2015.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
taz_trips_o=taz_trips_o.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d=taz_trips_d.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
In [ ]:
taz_trips_d_weekday=taz_trips_d_weekdays.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d_weekday["trips_per_weekday"] = taz_trips_d_weekday["count"]/23.0

Urban Centers¶

When comparing Freight Volumes between Urban Centers in PSRC region, Seattle Downtown stands out with about 22,000 trip destinations and origins.

Redmond-Overlake (187 %), Everett (92 %), and Tukwila (63 %) saw the biggest increase in freight trip destinations between 2015 and 2019 data.

In [ ]:
uc_trips_change = pd.merge(uc_trips_d_2015,uc_trips_d,  how="outer", on=["name"])
uc_trips_change =uc_trips_change.rename(columns={"name": "UC Name", "count_y": "2019 Freight Destinations",
                                "count_x": "2015 Freight Destinations"})
uc_trips_change['delta,%'] = (uc_trips_change['2019 Freight Destinations']-uc_trips_change['2015 Freight Destinations'])*1.0/uc_trips_change['2015 Freight Destinations']
uc_trips_change.sort_values(by='delta,%', ascending=False)
uc_trips_change_acres=pd.merge(uc_trips_change, urban_centers, left_on='UC Name', right_on="name" )
uc_trips_change_acres['2019 Freight Destinations per Acre']=uc_trips_change_acres['2019 Freight Destinations']/uc_trips_change_acres['area']
uc_trips_change_acres[["UC Name", "2019 Freight Destinations", '2019 Freight Destinations per Acre']]
Out[ ]:
UC Name 2019 Freight Destinations 2019 Freight Destinations per Acre
0 Puyallup Downtown 795 3.692344111146305214088281727
1 Auburn 1436 6.143489189125876400589344387
2 Bremerton 980 1.655976856192771363761354688
3 Federal Way 1231 6.163308840777417713431228427
4 University Place 1489 3.096248965232608097127729555
5 Kent 1070 3.659901589378871976233076029
6 Redmond-Overlake 4401 8.477278644980492216883336126
7 Lakewood 1670 5.016976482121796281685622948
8 Bothell Canyon Park 2681 4.760743570539478048956476584
9 Redmond Downtown 2859 6.605999015564738097519794029
10 Issaquah 3041 6.556192192946767440425799515
11 Greater Downtown Kirkland 3367 5.974592357440193956224954162
12 Seattle Northgate 2242 5.487325864697739663674172768
13 Puyallup South Hill 2935 3.473234307652406031429039958
14 Seattle Uptown 3929 11.73659774796639551870150390
15 Silverdale 3160 3.725962047715938484471249296
16 Burien 2184 5.089121550843779442953697358
17 Renton 3624 5.983526767514238481529287302
18 Seattle South Lake Union 4873 13.55523874772783693737493054
19 Everett 6867 10.38885099373818927714495058
20 SeaTac 6089 6.882555841562988739086366087
21 Seattle University Community 4290 5.693646778622940677944580403
22 Bellevue 5516 13.44768715998037421716736892
23 Kirkland Totem Lake 4349 5.165671377876530122203370342
24 Lynnwood 4135 5.415040528184491542456576598
25 Tacoma Mall 3053 5.311559327518764848856856226
26 Tacoma Downtown 7313 5.305193667415323837516987097
27 Tukwila 10885 12.74478381301056619015188302
28 Seattle First Hill/Capitol Hill 7695 8.412490109666598553374425904
29 Seattle Downtown 22289 23.87069931551246933980923970

Manufacturing Industrial Centers¶

The majority of Manufacturing Industrial Centers saw a freight growth between 2015 and 2019 freight trips data.

Duwamish and Kent MIC saw the biggest volume of freight destinations in both 2015 and 2019.

In [ ]:
mic_trips_change = pd.merge(mic_trips_d_2015,mic_trips_d,  how="outer", on=["mic"])
mic_trips_change =mic_trips_change.rename(columns={"mic": "MIC Name", "count_y": "2019 Freight Destinations",
                                "count_x": "2015 Freight Destinations"})
mic_trips_change['delta,%'] = (mic_trips_change['2019 Freight Destinations']-mic_trips_change['2015 Freight Destinations'])*1.0/mic_trips_change['2015 Freight Destinations']
mic_trips_sum_2015=mic_trips_change["2015 Freight Destinations"].sum()
mic_trips_sum_2019=mic_trips_change["2019 Freight Destinations"].sum()
mic_delta=(mic_trips_sum_2019-mic_trips_sum_2015)/mic_trips_sum_2015
total_delta=(total_d-total_d_2015)/total_d
regional_trips_change=pd.DataFrame([['All Regional', total_d_2015, total_d,total_delta]], columns=['MIC Name', '2015 Freight Destinations', '2019 Freight Destinations', 'delta,%'])
all_mic_trips_change=pd.DataFrame([['All MICs', mic_trips_sum_2015, mic_trips_sum_2019,mic_delta]], columns=['MIC Name', '2015 Freight Destinations', '2019 Freight Destinations', 'delta,%'])
mic_all_trips_change=pd.concat([mic_trips_change, all_mic_trips_change, regional_trips_change])
mic_all_trips_change['2015 Share']= mic_all_trips_change['2015 Freight Destinations']/total_d_2015
mic_all_trips_change['2019 Share']= mic_all_trips_change['2019 Freight Destinations']/total_d
mic_all_trips_change.sort_values(by='delta,%', ascending=False)
mic_all_trips_change.to_csv('C:/GitHub/data-science/inrix/mic_trips_2015_2019.csv')
In [ ]:
mic['mic'].unique
Out[ ]:
<bound method Series.unique of 0                                    Duwamish
1                                    Kent MIC
2    Puget Sound Industrial Center- Bremerton
3                            Ballard-Interbay
4                Paine Field / Boeing Everett
5                              Port of Tacoma
6                              Sumner Pacific
7                                Frederickson
8                                     Cascade
9                               North Tukwila
Name: mic, dtype: object>
In [ ]:
mic_trips_change_acres=pd.merge(mic_all_trips_change, mic, left_on='MIC Name', right_on="mic" )

mic_trips_change_acres['2019 Dest per Acre']=mic_trips_change_acres['2019 Freight Destinations']/mic_trips_change_acres['acres']
mic_trips_change_acres['2015 Dest per Acre']=mic_trips_change_acres['2015 Freight Destinations']/mic_trips_change_acres['acres']
total_acres_region=tracts['land_acres'].sum()
total_acres_mics=mic_trips_change_acres['acres'].sum()

total_acres_region
total_acres_mics
Out[ ]:
Decimal('33525.29212573')
In [ ]:
mic_trips_cols=mic_trips_change_acres[["MIC Name", "2015 Dest per Acre", '2019 Dest per Acre', 'acres']]
mic_trips_cols['Percent Region Area']= mic_trips_cols['acres']/total_acres_region
mic_trips_cols['Percent MIC Area']= mic_trips_cols['acres']/total_acres_mics    
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\406837503.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mic_trips_cols['Percent Region Area']= mic_trips_cols['acres']/total_acres_region
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\406837503.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mic_trips_cols['Percent MIC Area']= mic_trips_cols['acres']/total_acres_mics
In [ ]:
                     
regional_trips_acres=pd.DataFrame([['All Regional', total_d_2015/total_acres_region, total_d/total_acres_region, total_acres_region, 1, 0]], 
                                  columns=['MIC Name', '2015 Dest per Acre', '2019 Dest per Acre', 'acres', 'Percent Region Area', 'Percent MIC Area'])

mic_trips_acres=pd.DataFrame([['All MICs', mic_trips_sum_2015/total_acres_mics, mic_trips_sum_2019/total_acres_mics,total_acres_mics, total_acres_mics/total_acres_region, 1]], 
                                  columns=['MIC Name', '2015 Dest per Acre', '2019 Dest per Acre', 'acres', 'Percent Region Area', 'Percent MIC Area'])



mic_all_trips_acres=pd.concat([mic_trips_cols, regional_trips_acres, mic_trips_acres])

mic_all_trips_acres.to_csv('C:/GitHub/data-science/inrix/mic_trips_acres.csv')
In [ ]:
d_hex_count=pd.merge(hex_trips_d, hex_map, on=['GRID_ID'])

d_by_hex_count = gpd.GeoDataFrame(d_hex_count, crs="EPSG:4326", geometry='geometry')
In [ ]:
d_by_tract_count = pd.merge(tract_trips_d, tracts,on=["geoid10"])
o_by_tract_count = pd.merge(tract_trips_o, tracts,on=["geoid10"])

d_by_tract_count_2015 = pd.merge(tract_trips_d_2015, tracts,on=["geoid10"])
o_by_tract_count_2015 = pd.merge(tract_trips_o_2015, tracts,on=["geoid10"])
In [ ]:
d_by_uc_count = pd.merge(uc_trips_d, urban_centers,on=["name"])
o_by_uc_count = pd.merge(uc_trips_o, urban_centers,on=["name"])
In [ ]:
d_by_taz_count = pd.merge(taz_trips_d, taz,on=["taz"])
o_by_taz_count = pd.merge(taz_trips_o, taz,on=["taz"])
In [ ]:
d_by_taz_count_weekend = pd.merge(taz_trips_d_weekday, taz,on=["taz"])
In [ ]:
d_by_tract_count = gpd.GeoDataFrame(d_by_tract_count, crs="EPSG:4326", geometry='geometry')
o_by_tract_count = gpd.GeoDataFrame(o_by_tract_count, crs="EPSG:4326", geometry='geometry')

d_by_tract_count_2015 = gpd.GeoDataFrame(d_by_tract_count_2015, crs="EPSG:4326", geometry='geometry')
o_by_tract_count_2015 = gpd.GeoDataFrame(o_by_tract_count_2015, crs="EPSG:4326", geometry='geometry')

d_by_uc_count = gpd.GeoDataFrame(d_by_uc_count, crs="EPSG:4326", geometry='geometry')
o_by_uc_count = gpd.GeoDataFrame(o_by_uc_count, crs="EPSG:4326", geometry='geometry')

d_by_taz_count = gpd.GeoDataFrame(d_by_taz_count, crs="EPSG:4326", geometry='geometry')
o_by_taz_count = gpd.GeoDataFrame(o_by_taz_count, crs="EPSG:4326", geometry='geometry')
In [ ]:
d_by_taz_count = gpd.GeoDataFrame(d_by_taz_count, crs="EPSG:4326", geometry='geometry')
o_by_taz_count = gpd.GeoDataFrame(o_by_taz_count, crs="EPSG:4326", geometry='geometry')
In [ ]:
hex_trips_d.to_csv('C:/GitHub/data-science/inrix/truck_trips_by_hex_id.csv')
In [ ]:
d_by_taz_count_weekday = gpd.GeoDataFrame(d_by_taz_count_weekend, crs="EPSG:4326", geometry='geometry')

Truck Trips by Census Tracts¶

The truck trips origins and destinations are concentrated in areas with industrial activity such as:

Kent Freight Valley (25,600 - 53,000 trips in May 2019)
North of Puyallup (40,300 trips in May 2019)
Port of Tacoma (19,600 - 30,500 trips in May 2019)
Port of Seattle (28,400 trips in May 2019)
Auburn (23,200 trips in May 2019)
SeaTac Airport (16,100 trips in May 2019)

2015 data shows similar areas with the high freight trips volume.

In [ ]:
geosource = GeoJSONDataSource(geojson = d_by_hex_count.to_crs(3857).to_json())
output_file("C:/GitHub/data-science/inrix/hex_destinations.html")
output_notebook()
# Define color palettes
tile_provider = get_provider(Vendors.CARTODBPOSITRON)
#tile_provider = get_provider(Vendors.OSM)
palette = brewer['BuPu'][9]
palette = palette[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 4000)
# Define custom tick labels for color bar.
tick_labels = {'0': '0', '50': '50',
 '100':'100', '500':'500', '4000':'4000'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapper, 
                     label_standoff = 8,
                     width =400, height = 20,
                     border_line_color = None,
                     location = (0,0), 
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)
# Create figure object.
p = figure(title = 'Truck Destinations, May 2019', 
           plot_height = 600, plot_width =600, 
           toolbar_location = 'below',
           tools = 'pan, wheel_zoom, box_zoom, reset',
           x_range=(-13580000, -13660000), y_range=(5980000, 6120000) )
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
# Add patch renderer to figure.
states = p.patches('xs','ys', source = geosource,fill_alpha=0.6,
                   fill_color = {'field':'count',
                                 'transform' : color_mapper},
                   line_color = 'gray', 
                   line_width = 0.01)
# Create hover tool
p.add_tools(HoverTool(renderers = [states],
                     tooltips = [('GRID_ID','@GRID_ID'),
                               ('count', '@count')]))
# Specify layout
p.add_layout(color_bar, 'below')
p.add_tile(tile_provider)
show(p)
Loading BokehJS ...

Truck types Destinations by TAZ - Heavy¶

When we grouped heavy truck trips by TAZs, it looks like that 75% of TAZs are getting up to 28.5 trips per month or less than 1 trip per day. While some TAZs are getting up to 13,800 trips per month or about 443 trips per day. That means that the heavy truck trips are pretty concentrated to specific TAZs.

Heavy truck trips are concentrated on the East (along I-90) and South of the PSRC Region. The "hot spots" include:

North of Sumner (13,800 heavy truck trips)
Port of Seattle (6,500 heavy truck trips)
North Kent (4,400 heavy truck trips)
North Bend (4,300 heavy truck trips)

In [ ]:
 
Failed to start the Kernel. 

Unable to start Kernel 'Anaconda3 (Python 3.9.12)' due to a timeout waiting for the ports to get used. 

View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.

truck types Destinations by TAZ - Medium Trucks¶

Medium truck trips are more widely distributed across the PSRC region comparing with heavy truck trips. Some of the biggest hot spots are:

SeaTac airport (15,000 medium truck trips)
Kent Freight Valley
Auburn, North of Puyallup
Port of Tacoma and Port of Seattle

Freight trips Origins/Destinations per capita¶

In [ ]:
 
Failed to start the Kernel. 

Unable to start Kernel 'Anaconda3 (Python 3.9.12)' due to a timeout waiting for the ports to get used. 

View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.